By way of introduction…

quick/dirty v. involved/fancy

tips<-reshape2::tips # Load dataset on tipping behavior included with reshape2 package
attributes(tips) # Check attributes of the tips dataset (names, row.names, class)
## $names
## [1] "total_bill" "tip"        "sex"        "smoker"     "day"       
## [6] "time"       "size"      
## 
## $row.names
##   [1] "1"   "2"   "3"   "4"   "5"   "6"   "7"   "8"   "9"   "10"  "11" 
##  [12] "12"  "13"  "14"  "15"  "16"  "17"  "18"  "19"  "20"  "21"  "22" 
##  [23] "23"  "24"  "25"  "26"  "27"  "28"  "29"  "30"  "31"  "32"  "33" 
##  [34] "34"  "35"  "36"  "37"  "38"  "39"  "40"  "41"  "42"  "43"  "44" 
##  [45] "45"  "46"  "47"  "48"  "49"  "50"  "51"  "52"  "53"  "54"  "55" 
##  [56] "56"  "57"  "58"  "59"  "60"  "61"  "62"  "63"  "64"  "65"  "66" 
##  [67] "67"  "68"  "69"  "70"  "71"  "72"  "73"  "74"  "75"  "76"  "77" 
##  [78] "78"  "79"  "80"  "81"  "82"  "83"  "84"  "85"  "86"  "87"  "88" 
##  [89] "89"  "90"  "91"  "92"  "93"  "94"  "95"  "96"  "97"  "98"  "99" 
## [100] "100" "101" "102" "103" "104" "105" "106" "107" "108" "109" "110"
## [111] "111" "112" "113" "114" "115" "116" "117" "118" "119" "120" "121"
## [122] "122" "123" "124" "125" "126" "127" "128" "129" "130" "131" "132"
## [133] "133" "134" "135" "136" "137" "138" "139" "140" "141" "142" "143"
## [144] "144" "145" "146" "147" "148" "149" "150" "151" "152" "153" "154"
## [155] "155" "156" "157" "158" "159" "160" "161" "162" "163" "164" "165"
## [166] "166" "167" "168" "169" "170" "171" "172" "173" "174" "175" "176"
## [177] "177" "178" "179" "180" "181" "182" "183" "184" "185" "186" "187"
## [188] "188" "189" "190" "191" "192" "193" "194" "195" "196" "197" "198"
## [199] "199" "200" "201" "202" "203" "204" "205" "206" "207" "208" "209"
## [210] "210" "211" "212" "213" "214" "215" "216" "217" "218" "219" "220"
## [221] "221" "222" "223" "224" "225" "226" "227" "228" "229" "230" "231"
## [232] "232" "233" "234" "235" "236" "237" "238" "239" "240" "241" "242"
## [243] "243" "244"
## 
## $class
## [1] "data.frame"
# Create an object of class "lm" (linear model), regressing tip on some covariates
tips.reg<-lm(formula=tip~total_bill+sex+smoker+day+time+size, data=tips)
attributes(tips.reg) # Check attributes of the tips.reg object (names, class)
## $names
##  [1] "coefficients"  "residuals"     "effects"       "rank"         
##  [5] "fitted.values" "assign"        "qr"            "df.residual"  
##  [9] "contrasts"     "xlevels"       "call"          "terms"        
## [13] "model"        
## 
## $class
## [1] "lm"
plot(tips) # Calls plotting method for class of tips dataset ("data.frame")

plot(tips.reg, which=1:2) # Calls plotting method for class of tips.reg objects ("lm"), print first two plots only

xyplot(tips) # Attempt in lattice to automatically plot objects of class "data.frame"
## Error: no applicable method for 'xyplot' applied to an object of class
## "data.frame"
ggplot(data=tips)+geom_point() # Attempt in ggplot to automatically plot objects of class "data.frame"
## Error: 'where' is missing
xyplot(tips.reg) # Attempt in lattice to automatically plot objects of class "lm"
## Error: no applicable method for 'xyplot' applied to an object of class
## "lm"
ggplot(data=tips.reg)+geom_point() # Attempt in ggplot to automatically plot objects of class "lm"
## Error: 'where' is missing

The Dataset

For more info: http://www.ipw.unibe.ch/content/team/klaus_armingeon/comparative_political_data_sets/index_eng.html

lattice v. ggplot2

a) faster (though only noticeable over many and large plots)

b) simpler (at first)

c) better at trellis graphs

d) able to do 3d graphs

a) generally more elegant

b) more syntactically logical (and therefore simpler, once you learn it)

c) better at grouping

d) able to interface with maps

Basic usage: lattice

The general call for lattice graphics looks something like this:

graph_type(formula, data=, [options])

The specifics of the formula differ for each graph type, but the general format is straightforward

y             # Show the distribution of y

y~x           # Show the relationship between x and y 

y~x|A         # Show the relationship between x and y conditional on the values of A

y~x|A*B       # Show the relationship between x and y conditional on the combinations of A and B

z~y*x         # Show the 3D relationship between x, y, and z

Basic usage: ggplot2

The general call for ggplot2 graphics looks something like this:

ggplot(data=, aes(x=,y=, [options]))+geom_xxxx()+...+...+...

Note that ggplot2 graphs in layers in a continuing call (hence the endless +…+…+…), which really makes the extra layer part of the call

...+geom_xxxx(data=, aes(x=,y=,[options]),[options])+...+...+...

You can see the layering effect by comparing the same graph with different colors for each layer

ggplot(data=data, aes(x=year, y=realgdpgr))+geom_point(color="black")+geom_point(aes(x=year, y=unemp), color="red")

ggplot(data=data, aes(x=year, y=realgdpgr))+geom_point(color="red")+geom_point(aes(x=year, y=unemp), color="black")

Comparing lattice and ggplot

lattice v. ggplot2: Densities

densityplot(~vturn, data=data) # lattice

ggplot(data=data, aes(x=vturn))+geom_density() # ggplot2

lattice v. ggplot2: X-Y scatter plots

xyplot(outlays~year, data=data) # lattice

ggplot(data=data, aes(x=year, y=outlays))+geom_point() # ggplot2

lattice v. ggplot2: X-Y line plots

xyplot(outlays~year, data=data[data$country=="USA",], type="l") # lattice

ggplot(data=data[data$country=="USA",], aes(x=year, y=outlays))+geom_line() # ggplot2 

lattice v. ggplot2: bar plots

# Create data.frame of average growth rates by country over time
growth<-ddply(.data=data, .variables=.(country), summarize, mean=mean(realgdpgr, na.rm=T))

barchart(mean~country, data=growth) # lattice

ggplot(data=growth, aes(x=country, y=mean))+geom_bar() # ggplot2

lattice v. ggplot2: box plots

bwplot(outlays~country, data=data) # lattice

ggplot(data=data, aes(x=country, y=outlays))+geom_boxplot() # ggplot2

lattice v. ggplot2: “trellis” plots

xyplot(outlays~year|country, data=data) # lattice

ggplot(data=data, aes(x=year, y=outlays))+geom_point()+facet_wrap(~country) # ggplot2

lattice v. ggplot2: countour plots

data(volcano) # Load volcano contour data
volcano[1:10, 1:10] # Examine volcano dataset (first 10 rows and columns)
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
##  [1,]  100  100  101  101  101  101  101  100  100   100
##  [2,]  101  101  102  102  102  102  102  101  101   101
##  [3,]  102  102  103  103  103  103  103  102  102   102
##  [4,]  103  103  104  104  104  104  104  103  103   103
##  [5,]  104  104  105  105  105  105  105  104  104   103
##  [6,]  105  105  105  106  106  106  106  105  105   104
##  [7,]  105  106  106  107  107  107  107  106  106   105
##  [8,]  106  107  107  108  108  108  108  107  107   106
##  [9,]  107  108  108  109  109  109  109  108  108   107
## [10,]  108  109  109  110  110  110  110  109  109   108
volcano3d <- melt(volcano) # Use reshape2 package to melt the data
head(volcano3d) # Examine volcano3d dataset (head)
##   Var1 Var2 value
## 1    1    1   100
## 2    2    1   101
## 3    3    1   102
## 4    4    1   103
## 5    5    1   104
## 6    6    1   105
names(volcano3d) <- c("xvar", "yvar", "zvar") # Rename volcano3d columns
contourplot(zvar~xvar+yvar, data=volcano3d) # lattice

ggplot(data=volcano3d, aes(x=xvar, y=yvar, z = zvar))+geom_contour() # ggplot2

lattice v. ggplot2: tile/image/level plots

levelplot(zvar~xvar+yvar, data=volcano3d) # lattice

ggplot(data=volcano3d, aes(x=xvar, y=yvar, z = zvar))+geom_tile(aes(fill=zvar)) # ggplot2

lattice: 3D plots

# Create a subset of the dataset containing only data for France
france.data<-data[data$country=="France",]
cloud(outlays~year*realgdpgr, data=france.data)

# Create a subset of the dataset containing only data for Greece, Portugal, Ireland, and Spain
pigs.data<-data[data$country %in% c("Greece", "Portugal", "Ireland", "Spain"),]
cloud(outlays~year*realgdpgr|country, data=pigs.data)

ggplot2: Panel plots

ggplot(data=pigs.data, aes(x=year, y=realgdpgr, color=country))+geom_line()

lattice v. ggplot2: options [labeling]

xyplot(outlays~year, data=data, xlab="Year", ylab="Government Outlays", main="Cool Graph") # lattice

ggplot(data=data, aes(x=year, y=outlays))+geom_point()+xlab(label="Voter Turnout (%)")+ylab(label="Government Outlays")+ggtitle(label="Cool Graph") # ggplot2

lattice v. ggplot2: options [axis + size scaling]

xyplot(outlays~year, data=data) # lattice

xyplot(outlays~year, data=data, cex=2) # lattice

xyplot(outlays~year, data=data, cex=.5) # lattice

ggplot(data=data, aes(x=year, y=outlays))+geom_point() # ggplot2

ggplot(data=data, aes(x=year, y=outlays))+geom_point(size=3) # ggplot2

ggplot(data=data, aes(x=year, y=outlays))+geom_point(size=1) # ggplot2

lattice v. ggplot2: options [graphical parameters]

xyplot(outlays~year, data=data, col=colors()[145]) #lattice

xyplot(outlays~year, data=data, col="red") #lattice

ggplot(data=data, aes(x=year, y=outlays))+geom_point(color=colors()[145]) # ggplot2

ggplot(data=data, aes(x=year, y=outlays))+geom_point(color="red") # ggplot2

xyplot(outlays~year, data=data, pch=3) # lattice

xyplot(outlays~year, data=data, pch=15) # lattice

ggplot(data=data, aes(x=year, y=outlays))+geom_point(shape=3) # ggplot2

ggplot(data=data, aes(x=year, y=outlays))+geom_point(shape=15) # ggplot2

xyplot(outlays~year, data=data, pch=3) # lattice

xyplot(outlays~year, data=data, pch=15) # lattice

xyplot(outlays~year, data=data, pch="w") # lattice

xyplot(outlays~year, data=data, pch="$", cex=2) # lattice

ggplot(data=data, aes(x=year, y=outlays))+geom_point(shape=3) # ggplot2

ggplot(data=data, aes(x=year, y=outlays))+geom_point(shape=15) # ggplot2

ggplot(data=data, aes(x=year, y=outlays))+geom_point(shape="w") # ggplot2

ggplot(data=data, aes(x=year, y=outlays))+geom_point(shape="$", size=5) # ggplot2

xyplot(outlays~year, data=data[data$country=="USA",], type="l", lty=1) # lattice

xyplot(outlays~year, data=data[data$country=="USA",], type="l", lty=2) # lattice

xyplot(outlays~year, data=data[data$country=="USA",], type="l", lty=3) # lattice

xyplot(outlays~year, data=data[data$country=="USA",], type="l", lty=3, lwd=2) # lattice

xyplot(outlays~year, data=data[data$country=="USA",], type="l", lty=3, lwd=3) # lattice

xyplot(outlays~year, data=data[data$country=="USA",], type="l", lty=3, lwd=4) # lattice

ggplot(data=data[data$country=="USA",], aes(x=year, y=outlays))+geom_line(linetype=1) # ggplot2

ggplot(data=data[data$country=="USA",], aes(x=year, y=outlays))+geom_line(linetype=2) # ggplot2

ggplot(data=data[data$country=="USA",], aes(x=year, y=outlays))+geom_line(linetype=3) # ggplot2

ggplot(data=data[data$country=="USA",], aes(x=year, y=outlays))+geom_line(linetype=3, size=1) # ggplot2

ggplot(data=data[data$country=="USA",], aes(x=year, y=outlays))+geom_line(linetype=3, size=1.5) # ggplot2

ggplot(data=data[data$country=="USA",], aes(x=year, y=outlays))+geom_line(linetype=3, size=2) # ggplot2

ggplot2 and the Grammar of Graphics

ggplot2 and the Grammar of Graphics

1) One or more statistics conveying information about the data (identities, means, medians, etc.)

2) A coordinate system that differentiates between the intersections of statistics (at most two for ggplot, three for lattice)

3) Geometries that differentiate between off-coordinate variation in kind

4) Scales that differentiate between off-coordinate variation in degree

Anatomy of aes()

ggplot(data=, aes(x=, y=, color=, linetype=, shape=, size=))

ggplot2 is optimized for showing variation on all four aesthetic types

# Differences in kind using color
ggplot(data=pigs.data, aes(x=year, y=outlays))+geom_line(aes(color=country))

Note what happens when we specify the color parameter outside of the aesthetic operator. ggplot2 views these specifications as invalid graphical parameters.

ggplot(data=pigs.data, aes(x=year, y=outlays))+geom_line(color=country)
## Error: object 'country' not found
ggplot(data=pigs.data, aes(x=year, y=outlays))+geom_line(color="country")
## Error: invalid color name 'country'
ggplot(data=pigs.data, aes(x=year, y=outlays))+geom_line(color="red")

# Differences in kind using line types
ggplot(data=pigs.data, aes(x=year, y=outlays))+geom_line(aes(linetype=country))

# Differences in kind using point shapes
ggplot(data=pigs.data, aes(x=year, y=outlays))+geom_point(aes(shape=country))

# Differences in degree using color
ggplot(data=pigs.data, aes(x=year, y=outlays))+geom_point(aes(color=realgdpgr))

# Differences in degree using point size
ggplot(data=pigs.data, aes(x=year, y=outlays))+geom_point(aes(size=realgdpgr))

# Multiple non-cartesian aesthetics (differences in kind using color, degree using point size)
ggplot(data=pigs.data, aes(x=year, y=outlays))+geom_point(aes(color=country,size=realgdpgr))

Fitted lines and curves with ggplot2

ggplot(data=pigs.data, aes(x=year, y=outlays))+geom_point()

# Add linear model (lm) smoother
ggplot(data=pigs.data, aes(x=year, y=outlays))+geom_point()+geom_smooth(method="lm")

# Add local linear model (loess) smoother, span of 0.75 
ggplot(data=pigs.data, aes(x=year, y=outlays))+geom_point()+geom_smooth(method="loess", span=.75)

# Add local linear model (loess) smoother, span of 0.25 
ggplot(data=pigs.data, aes(x=year, y=outlays))+geom_point()+geom_smooth(method="loess", span=.25)

# Add linear model (lm) smoother, no standard error shading 
ggplot(data=pigs.data, aes(x=year, y=outlays))+geom_point()+geom_smooth(method="lm", se=F)

# Add local linear model (loess) smoother, no standard error shading 
ggplot(data=pigs.data, aes(x=year, y=outlays))+geom_point()+geom_smooth(method="loess", se=F)

# Add a local linear (loess) smoother for each country
ggplot(data=pigs.data, aes(x=year, y=outlays))+geom_point(aes(color=country))+geom_smooth(aes(color=country))

# Add a local linear (loess) smoother for each country, no standard error shading
ggplot(data=pigs.data, aes(x=year, y=outlays))+geom_point(aes(color=country, size=realgdpgr))+geom_smooth(aes(color=country), se=F)

lattice v. ggplot2: tables

# Initialize gridExtra library
library(gridExtra)
# Create 3 plots to combine in a table
plot1<-ggplot(data=pigs.data, aes(x=year, y=outlays, color=))+geom_line(aes(color=country))
plot2<-ggplot(data=pigs.data, aes(x=year, y=outlays, linetype=))+geom_line(aes(linetype=country))
plot3<-ggplot(data=pigs.data, aes(x=year, y=outlays, shape=))+geom_point(aes(shape=country))
# Call grid.arrange
grid.arrange(plot1, plot2, plot3, nrow=3, ncol=1)

Exporting

Two basic image types

1) Raster/Bitmap (.png, .jpeg)

Every pixel of a plot contains its own separate coding; not so great if you want to resize the image

jpeg(filename="example.png", width=, height=)
plot(x,y)
dev.off()

2) Vector (.pdf, .ps)

Every element of a plot is encoded with a function that gives its coding conditional on several factors; great for resizing

pdf(filename="example.pdf", width=, height=)
plot(x,y)
dev.off()

Exporting with lattice v. ggplot

# Assume we saved our plot is an object called example.plot

# lattice
trellis.device(device="pdf", filename="example.pdf")
print(example.plot)
dev.off()

# ggplot2
ggsave(filename="example.pdf", plot=example.plot, scale=, width=, height=) # ggplot2

Breakout!

1) Not all variable types are suitable for representation by every ggplot aesthetic. What kinds of variables can the aesthetics color, size, and shape meaningfully represent?

2) Using ggplot2, create a trellis plot where, for a given country, each panel uses a) HOLLOW CIRCLES to plot real GDP growth over time, and b) a red LOESS smoother without standard errors to plot the trend in unemployment over time. BONUS: limit the years shown to the period from 2000 to 2010, and turn off the grey background.

Breakout Answers!